{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Moorcheh Vector Store Demo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Install Required Packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install llama_index\n",
    "!pip install moorcheh_sdk"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Required Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# demo.py\n",
    "\n",
    "# --- Welcome to the Demo of the Moorcheh Vector Store ---\n",
    "# --- Import the following packages --\n",
    "import logging\n",
    "import sys\n",
    "import os\n",
    "from moorcheh_sdk import MoorchehClient\n",
    "from IPython.display import Markdown, display\n",
    "from typing import Any, Callable, Dict, List, Optional, cast\n",
    "from llama_index.core import (\n",
    "    VectorStoreIndex,\n",
    "    SimpleDirectoryReader,\n",
    "    StorageContext,\n",
    "    Settings,\n",
    ")\n",
    "from llama_index.core.base.embeddings.base_sparse import BaseSparseEmbedding\n",
    "from llama_index.core.bridge.pydantic import PrivateAttr\n",
    "from llama_index.core.schema import BaseNode, MetadataMode, TextNode\n",
    "from llama_index.core.vector_stores.types import (\n",
    "    BasePydanticVectorStore,\n",
    "    MetadataFilters,\n",
    "    VectorStoreQuery,\n",
    "    VectorStoreQueryMode,\n",
    "    VectorStoreQueryResult,\n",
    ")\n",
    "from llama_index.core.vector_stores.utils import (\n",
    "    DEFAULT_TEXT_KEY,\n",
    "    legacy_metadata_dict_to_node,\n",
    "    metadata_dict_to_node,\n",
    "    node_to_metadata_dict,\n",
    ")\n",
    "from llama_index.core.vector_stores.types import (\n",
    "    MetadataFilter,\n",
    "    MetadataFilters,\n",
    "    FilterOperator,\n",
    "    FilterCondition,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Configure Logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Logging Setup ---\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Moorcheh API Key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Set the values of the API Keys in your Environment Variables ---\n",
    "from google.colab import userdata\n",
    "\n",
    "api_key = os.environ[\"MOORCHEH_API_KEY\"] = userdata.get(\"MOORCHEH_API_KEY\")\n",
    "\n",
    "if \"MOORCHEH_API_KEY\" not in os.environ:\n",
    "    raise EnvironmentError(f\"Environment variable MOORCHEH_API_KEY is not set\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load and Chunk Documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Load Documents ---\n",
    "documents = SimpleDirectoryReader(\"./documents\").load_data()\n",
    "\n",
    "# --- Set chunk size and overlap ---\n",
    "Settings.chunk_size = 1024\n",
    "Settings.chunk_overlap = 20"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Initialize Vector Store and Create Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Initialize the Moorcheh Vector Store ---\n",
    "__all__ = [\"MoorchehVectorStore\"]\n",
    "\n",
    "# Creates a Moorcheh Vector Store with the following parameters\n",
    "# For text-based namespaces, set namespace_type to \"text\" and vector_dimension to None\n",
    "# For vector-based namespaces, set namespace_type to \"vector\" and vector_dimension to the dimension of your uploaded vectors\n",
    "vector_store = MoorchehVectorStore(\n",
    "    api_key=api_key,\n",
    "    namespace=\"llamaindex_moorcheh\",\n",
    "    namespace_type=\"text\",\n",
    "    vector_dimension=None,\n",
    "    add_sparse_vector=False,\n",
    "    batch_size=100,\n",
    ")\n",
    "\n",
    "# --- Create a Vector Store Index using the Vector Store and given Documents ---\n",
    "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
    "index = VectorStoreIndex.from_documents(\n",
    "    documents, storage_context=storage_context\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Query the Vector Store"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Generate Response ---\n",
    "# --- Set Logging to DEBUG for more Detailed Outputs ---\n",
    "query_engine = index.as_query_engine()\n",
    "response = vector_store.generate_answer(\n",
    "    query=\"Which company has had the highest revenue in 2025 and why?\"\n",
    ")\n",
    "moorcheh_response = vector_store.get_generative_answer(\n",
    "    query=\"Which company has had the highest revenue in 2025 and why?\",\n",
    "    ai_model=\"anthropic.claude-3-7-sonnet-20250219-v1:0\",\n",
    ")\n",
    "\n",
    "display(Markdown(f\"<b>{response}</b>\"))\n",
    "print(\n",
    "    \"\\n\\n================================\\n\\n\",\n",
    "    response,\n",
    "    \"\\n\\n================================\\n\\n\",\n",
    ")\n",
    "print(\n",
    "    \"\\n\\n================================\\n\\n\",\n",
    "    moorcheh_response,\n",
    "    \"\\n\\n================================\\n\\n\",\n",
    ")\n",
    "\n",
    "# --- Filters for Metadata ---\n",
    "filter = MetadataFilters(\n",
    "    filters=[\n",
    "        MetadataFilter(\n",
    "            key=\"file_path\",\n",
    "            value=\"insert the file path to the document here\",\n",
    "            operator=FilterOperator.EQ,\n",
    "        )\n",
    "    ],\n",
    "    condition=FilterCondition.AND,\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
